#将田老师给的gbk格式的REC文件转成utf-8格式的
iconv -f GBK -t UTF-8 国家代号与区号.txt -o 国家代号与区号2.txt
#改名
mv 国家代号与区号2.txt address.txt
#将address.txt，这个多行的REC文件，转换成一行
sed ':a; N;s/\n/ /; ta;' address.txt > address2.txt
#合并之后的数据按照"^M "进行分割,需要去掉这个分隔符
sed "s/\r //g" address2.txt > address3.txt
#将domain_code_belongs_continent2.txt放到HDFS中
hdfs dfs -put /data/yzw/address3.txt /tmp/InternationalData_source
#HDFSToJson.sh中已经跑完HDFSToJson
spark-submit \
--master yarn \
--deploy-mode cluster \
--num-executors 8 \
--executor-memory 11G \
--driver-memory 2G \
--executor-cores 3 \
--conf spark.network.timeout=10000000 \
--conf spark.yarn.preserve.staging.files=true \
--class com.cnki.result_transform.YEARBOOKTABLEDATA2008_FORMAL_REPORTGROUPER_GUOJI_2019_ZL \
/data/yzw/original-makeChange-1.0-SNAPSHOT.jar

spark-submit \
--master yarn \
--deploy-mode cluster \
--num-executors 8 \
--executor-memory 11G \
--driver-memory 2G \
--executor-cores 3 \
--conf spark.network.timeout=10000000 \
--conf spark.yarn.preserve.staging.files=true \
--class com.cnki.result_transform.YEARBOOKTABLEDATA2008_FORMAL_REPORTGROUPER_GUOJI_2019_ZLJsonToREC \
/data/yzw/original-makeChange-1.0-SNAPSHOT.jar
